GOAL:

Create Files Directory

directory_df = create_file_directory()
directory_df = directory_transformations(directory_df)

Read Manifests

lb_df = read.csv("./data/leaderboard/leaderboard.csv")
lb_df = lb_feature_transformations(lb_df)
achievements_manifest = read.csv("./data/manifest/achievements_manifest.csv")

Read Sample of Gamers

set.seed(196)
rnd_gamer_sample = sample_random_gamers(200, directory_df = directory_df)
rnd_gamer_sample = lapply(rnd_gamer_sample, function(x) x[order(rnd_gamer_sample[[3]])])

Achievement Transformations

  • Removes character entries for achievement earned.
  • Formats date and adds formatted columns for month, day of year and isoweek.
  • Creates column for tracking weekend / weekday
rnd_gamer_sample[[1]] = achievement_transform_today(rnd_gamer_sample[[1]], directory_df)
rnd_gamer_sample[[1]] = achievement_transform_yesterday(rnd_gamer_sample[[1]], directory_df)
rnd_gamer_sample[[1]] = achievement_transform_drop_offline(rnd_gamer_sample[[1]])
rnd_gamer_sample[[1]] = achievement_transform_format_dates(rnd_gamer_sample[[1]])
rnd_gamer_sample[[1]] = achievement_transform_extract_dates(rnd_gamer_sample[[1]])

Game Transformations

rnd_gamer_sample[[2]] = games_transform_drop_bad_titles(rnd_gamer_sample[[2]])
rnd_gamer_sample[[2]] = games_transform_hours(rnd_gamer_sample[[2]])

Metrics Preprocessing (Total)

print(paste("TOTAL OBSERVATIONS:", get_total_observations(rnd_gamer_sample[[1]])))
## [1] "TOTAL OBSERVATIONS: 401773"
metrics_df = process_metrics_df(rnd_gamer_sample, directory_df)

Frequency Data Preprocessing

frequency_dfs = achievement_calculate_frequencies(rnd_gamer_sample)
frequency_combined_df = bind_rows(frequency_dfs, .id = "data_frame_id")
frequency_combined_df$data_frame_id = as.numeric(frequency_combined_df$data_frame_id)
da_df = calculate_daily_achievements(frequency_combined_df)
da_df = da_fill_dates(da_df)

da_profiles = da_split_by_profile(da_df)
da_profiles = da_profiles_set_churn(da_profiles)
## [1] "PROFILE: 150 DROPPED (All NA values)"
da_profiles = da_profiles_set_days_existence(da_profiles)
da_profiles = calculate_daily_lt_eir(da_profiles)
da_profiles = calculate_weekly_eir_all(da_profiles)
da_profiles = calculate_monthly_eir_all(da_profiles)

Leaderboard Frequency Plot

plot_lb_range_interactive(lb_df, "Score", 0, 4000000, 1000000)
Leaderboard Interactive Histogram

Leaderboard Interactive Histogram

Frequency Plots by Profile

## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.
Shiny applications not supported in static R Markdown documents

Churned @ 365 Days Histogram

# Plot histogram of churned with different colors for TRUE, FALSE, and NA
ggplot(metrics_df, aes(x = churned, fill = factor(churned))) +
  geom_bar(color = "white") +
  scale_fill_manual(values = c("darkgreen", "darkred", "gray")) +
  labs(title = "Churned Histogram (365 Days Since Last Achievement)", x = "Churned Status", y = "Count")

Longest Streak Histogram

ggplot(metrics_df, aes(x = longest_streak, fill = factor(longest_streak))) +
  geom_bar(color = "white") +
  labs(title = "Streak Histogram", x = "Longest Streak (in Days)", y = "Count")

Game Time Box Plot

# Create the box plot for game time
ggplot(metrics_df, aes(x = "", y = total_game_time_minutes / 60, fill = "Game Time")) +
  geom_boxplot(width = 0.5, position = position_dodge(width = 0.9), color = "black", outlier.color = "darkred", outlier.shape = 16, outlier.size = 3) +
  labs(x = "", y = "Game Time (Hours)", fill = "") +
  scale_fill_manual(values = "#FF7F00") +
  theme(legend.position = "top", legend.title = element_blank()) +
  scale_y_continuous(labels = scales::comma) +
  coord_flip()

App Time Box Plot

# Create the box plot
ggplot(metrics_df[metrics_df$total_app_time_minutes > 0,], aes(x = "", y = total_app_time_minutes / 60, fill = "App Time")) +
  geom_boxplot(width = 0.5, position = position_dodge(width = 0.9), color = "black", outlier.color = "darkblue", outlier.shape = 16, outlier.size = 3) +
  labs(x = "", y = "App Time (Hours)", fill = "", caption = paste("Number of Zero Values Filtered Out:", sum(metrics_df$total_app_time_minutes == 0))) +
  scale_fill_manual(values = "#1F78B4") +
  theme(legend.position = "top", legend.title = element_blank()) +
  scale_y_continuous(labels = scales::comma) +
  coord_flip()

Game vs App Time Scatter Plot

ggplot(metrics_df, aes(x = total_game_time_minutes / 60, y = total_app_time_minutes / 60, color = total_app_time_minutes / 60)) +
  geom_point() +
  labs(x = "Total Game Time (Hours)", y = "Total App Time (Hours)", color = "Total App Time (Hours)") +
  scale_color_gradient(low = "blue", high = "red") +
  ggtitle("Total Time: Game vs App (Hours)") +
  scale_x_continuous(labels = scales::comma) +
  scale_y_continuous(labels = scales::comma)